import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from arcgis.gis import GIS
from arcgis.features import GeoAccessor, GeoSeriesAccessor
gis = GIS()
csv_path = 'resources/houses_for_sale_filtered.csv'
prop_df = pd.read_csv(csv_path)
prop_df.head(3)
prop_df.shape
prop_df.columns
Drop redundant columns
try:
prop_df.drop(columns=['Unnamed: 0'], inplace=True)
prop_df.drop(columns=['Unnamed: 0.1'], inplace=True)
except:
pass
Convert to Spatially Enabled DataFrame
prop_sdf = pd.DataFrame.spatial.from_xy(prop_df, 'LONGITUDE','LATITUDE')
type(prop_sdf)
pdx_map = gis.map('Portland, OR')
pdx_map.basemap = 'streets'
pdx_map

prop_sdf.spatial.plot(map_widget=pdx_map)
pdx_density_map = gis.map('Portland, OR')
pdx_density_map.basemap='gray'
pdx_density_map

prop_sdf.spatial.plot(map_widget=pdx_density_map, renderer_type='h')
pdx_age_map = gis.map("Portland, OR")
pdx_age_map.basemap = 'gray-vector'
pdx_age_map

prop_sdf.spatial.plot(map_widget = pdx_age_map,
renderer_type='c', # for classs breaks renderer
method='esriClassifyNaturalBreaks', # classification scheme
class_count=10, # between 1900 - 2000, each decade in a class
col='YEAR BUILT',
cmap='Blues', # matplotlib color map
alpha=0.7,
outline_color=[0,0,0,0])
age_class_breaks = pdx_age_map.layers[0].layer.layerDefinition.drawingInfo.renderer.classBreakInfos
# print(len(age_class_breaks))
cbs_list = []
cmap_list = []
for cb in age_class_breaks:
# print(cb.description) # print the class break labels
cbs_list.append(cb.classMaxValue)
cmap_list.append([x/255.0 for x in cb.symbol.color])
# build a histogram for the same class breaks
n, bins, patches = plt.hist(prop_sdf['YEAR BUILT'], bins=cbs_list)
# apply the same color for each class to match the map
idx = 0
for c, p in zip(bins, patches):
plt.setp(p, 'facecolor', cmap_list[idx])
idx+=1
plt.title('Histogram of YEAR BUILT column')
pdx_price_map = gis.map("Portland, OR")
pdx_price_map.basemap = 'gray-vector'
pdx_price_map

prop_sdf.spatial.plot(map_widget = pdx_price_map,
renderer_type='c', # for classs breaks renderer
method='esriClassifyQuantile', # classification scheme
class_count=10, # between 1900 - 2000, each decade in a class
col='PRICE',
cmap='BuPu_r', # matplotlib color map
alpha=0.5,
outline_color=[50,0,0,50], line_width=1)
price_class_breaks = pdx_price_map.layers[0].layer.layerDefinition.drawingInfo.renderer.classBreakInfos
# print(len(age_class_breaks))
cbs_list = []
cmap_list = []
for cb in price_class_breaks:
# print(cb.description) # print the class break labels
cbs_list.append(cb.classMaxValue)
cmap_list.append([x/255.0 for x in cb.symbol.color])
# build a histogram for the same class breaks
n, bins, patches = plt.hist(prop_sdf['PRICE'], bins=cbs_list)
# apply the same color for each class to match the map
idx = 0
for c, p in zip(bins, patches):
plt.setp(p, 'facecolor', cmap_list[idx])
idx+=1
plt.title('Histogram of PRICE column')
pdx_size_map = gis.map("Portland, OR")
pdx_size_map.basemap = 'gray-vector'
pdx_size_map

prop_sdf.spatial.plot(map_widget = pdx_size_map,
renderer_type='c', # for classs breaks renderer
method='esriClassifyNaturalBreaks', # classification scheme
class_count=10, # between 1900 - 2000, each decade in a class
col='SQUARE FEET',
cmap='RdBu', # matplotlib color map
alpha=0.7,
outline_color=[50,0,0,50], line_width=1)
size_class_breaks = pdx_size_map.layers[0].layer.layerDefinition.drawingInfo.renderer.classBreakInfos
# print(len(age_class_breaks))
cbs_list = []
cmap_list = []
for cb in size_class_breaks:
# print(cb.description) # print the class break labels
cbs_list.append(cb.classMaxValue)
cmap_list.append([x/255.0 for x in cb.symbol.color])
# build a histogram for the same class breaks
n, bins, patches = plt.hist(prop_sdf['SQUARE FEET'], bins=cbs_list)
# apply the same color for each class to match the map
idx = 0
for c, p in zip(bins, patches):
plt.setp(p, 'facecolor', cmap_list[idx])
idx+=1
plt.title('Histogram of SQUARE FEET column')
pdx_hoa_map = gis.map("Portland, OR")
pdx_hoa_map.basemap = 'gray-vector'
pdx_hoa_map

#plot properties without HOA as hollow
# prop_sdf_hoa_f = prop_df[prop_df['HOA PER MONTH']==0]
# prop_sdf_hoa_f.spatial.plot(map_widget=pdx_hoa_map, symbol_type='simple',
# symbol_style='+',outline_color='Blues',
# marker_size=7)
prop_sdf_hoa_2 = prop_df[prop_df['HOA PER MONTH']>0]
prop_sdf_hoa_2.spatial.plot(map_widget = pdx_hoa_map,
renderer_type='c', # for classs breaks renderer
method='esriClassifyQuantile', # classification scheme
class_count=10, # between 1900 - 2000, each decade in a class
col='HOA PER MONTH',
cmap='RdBu', # matplotlib color map
alpha=0.7,
outline_color=[0,0,0,0], line_width=0)
hoa_class_breaks = pdx_hoa_map.layers[0].layer.layerDefinition.drawingInfo.renderer.classBreakInfos
# print(len(age_class_breaks))
cbs_list = []
cmap_list = []
for cb in hoa_class_breaks:
# print(cb.description) # print the class break labels
cbs_list.append(cb.classMaxValue)
cmap_list.append([x/255.0 for x in cb.symbol.color])
# build a histogram for the same class breaks
n, bins, patches = plt.hist(prop_sdf['HOA PER MONTH'], bins=cbs_list)
# apply the same color for each class to match the map
idx = 0
for c, p in zip(bins, patches):
plt.setp(p, 'facecolor', cmap_list[idx])
idx+=1
plt.title('Histogram of HOA PER MONTH column')

ax_list = prop_df.hist(bins=25, layout=(4,4), figsize=(15,12))
Explore the frequency of categorical columns
fig2, ax2 = plt.subplots(1,2, figsize=(10,5))
prop_df['CITY'].value_counts().plot(kind='bar', ax=ax2[0],
title='City name frequency')
ax2[0].tick_params(labelrotation=45)
prop_df['PROPERTY TYPE'].value_counts().plot(kind='bar', ax=ax2[1],
title='Property type frequency')
ax2[1].tick_params(labelrotation=45)
plt.tight_layout()
filtered_df = prop_sdf[(prop_df['BEDS']>=2) &
(prop_df['BATHS']>1)&
(prop_df['HOA PER MONTH']<=200) &
(prop_df['YEAR BUILT']>=2000) &
(prop_df['SQUARE FEET'] > 2000) &
(prop_df['PRICE']<=700000)]
filtered_df.shape
(prop_sdf.shape, filtered_df.shape)
ax_list2 = filtered_df.hist(bins=25, layout=(4,4), figsize=(15,15))
pdx_filtered_map = gis.map("Portland, OR")
pdx_filtered_map.basemap = 'gray-vector'
pdx_filtered_map

filtered_df.spatial.plot(map_widget=pdx_filtered_map,
renderer_type='c',
method='esriClassifyNaturalBreaks', # classification scheme
class_count=10,
col='PRICE',
cmap='Blues', # matplotlib color map
alpha=0.7,outline_color=[0,0,0,0])
So far, we used attribute queries to explore and filter out properties. We have not yet used GIS analysis to narrow them further. Before that, let us save our work to disk.
filtered_df.to_csv('resources/houses_for_sale_att_filtered.csv')